del(17p)
fit = mod.risk$d17p
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR11,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "del(17p) PSL decile counts")
del(17p) PSL decile counts
| 1 |
43 |
1 |
| 2 |
42 |
2 |
| 3 |
42 |
1 |
| 4 |
44 |
NA |
| 5 |
38 |
5 |
| 6 |
36 |
8 |
| 7 |
37 |
6 |
| 8 |
41 |
3 |
| 9 |
35 |
8 |
| 10 |
26 |
18 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: del(17p) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: del(17p) PSL in decile 10 compared with deciles 5 + 6 = 3.94 (1.7-9.14)"
t(14;16)
fit = mod.risk$t1416
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
[1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR8,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "t(14;16) PSL decile counts")
t(14;16) PSL decile counts
| 1 |
56 |
NA |
| 2 |
55 |
NA |
| 3 |
54 |
1 |
| 4 |
55 |
1 |
| 5 |
54 |
1 |
| 6 |
52 |
3 |
| 7 |
51 |
5 |
| 8 |
52 |
3 |
| 9 |
43 |
12 |
| 10 |
29 |
27 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: t(14;16) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
[1] “Odds Ratio: t(14;16) PSL in decile 10 compared with deciles 5 + 6 = 24.67 (7.99-76.19)”
amp(1q)
fit = mod.risk$a1q
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR13,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "amp(1q) PSL decile counts")
amp(1q) PSL decile counts
| 1 |
51 |
2 |
| 2 |
52 |
NA |
| 3 |
47 |
6 |
| 4 |
39 |
13 |
| 5 |
47 |
6 |
| 6 |
33 |
19 |
| 7 |
30 |
22 |
| 8 |
12 |
41 |
| 9 |
5 |
47 |
| 10 |
7 |
46 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: amp(1q) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: amp(1q) PSL in decile 10 compared with deciles 5 + 6 = 21.03 (8.44-52.41)"
t(4;14)
fit = mod.risk$t414
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR3,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "t(4;14) PSL decile counts")
t(4;14) PSL decile counts
| 1 |
59 |
1 |
| 2 |
57 |
2 |
| 3 |
55 |
4 |
| 4 |
56 |
4 |
| 5 |
55 |
4 |
| 6 |
54 |
5 |
| 7 |
57 |
3 |
| 8 |
52 |
7 |
| 9 |
29 |
30 |
| 10 |
12 |
48 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: t(4;14) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: t(4;14) PSL in decile 10 compared with deciles 5 + 6 = 48.44 (19.14-122.61)"
t(11;14)
fit = mod.risk$t1114
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR6,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "t(11;14) PSL decile counts")
t(11;14) PSL decile counts
| 1 |
59 |
1 |
| 2 |
54 |
5 |
| 3 |
53 |
6 |
| 4 |
54 |
6 |
| 5 |
54 |
5 |
| 6 |
55 |
4 |
| 7 |
49 |
11 |
| 8 |
36 |
23 |
| 9 |
18 |
41 |
| 10 |
9 |
51 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: t(11;14) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: t(11;14) PSL in decile 10 compared with deciles 5 + 6 = 68.63 (25.71-183.22)"
ISS
fit = mod.risk$iss
psl = rowSums(data.matrix(fit$model[,-1]) %*% diag(coef(fit))) # multiple each PC by the PC beta, sum across PCs
# check PSL calculation
unique(round(fit$lp,12) == round(psl,12))
## [1] TRUE
#data.table(psl,fit$lp)
# split into deciles
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$model$ISS,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category==1) %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category==2) %>% group_by(decile) %>% count(category,name = "n_2")
tmp3 = dat %>% filter(category==3) %>% group_by(decile) %>% count(category,name = "n_3")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_1","n_2","n_3")]
kable(cnt,align = 'c',caption = "ISS PSL decile counts")
ISS PSL decile counts
| 1 |
52 |
17 |
6 |
| 2 |
36 |
33 |
6 |
| 3 |
37 |
26 |
11 |
| 4 |
30 |
29 |
16 |
| 5 |
34 |
27 |
13 |
| 6 |
26 |
30 |
19 |
| 7 |
17 |
32 |
25 |
| 8 |
8 |
39 |
28 |
| 9 |
14 |
21 |
39 |
| 10 |
10 |
15 |
50 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_3]
b = cnt[decile%in%c(10),n_1]
c = sum(cnt[decile%in%c(5,6),n_3])
d = sum(cnt[decile%in%c(5,6),n_1])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: ISS PSL in decile 10 compared with deciles 5 + 6 for stage 1 v 3 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: ISS PSL in decile 10 compared with deciles 5 + 6 for stage 1 v 3 = 9.38 (4.2-20.93)"
Overall Survival (OS)
# multiply each PC by the PC beta, sum across PCs
psl = rowSums(data.matrix(os$data[,-c(1:2)]) %*% diag(coef(os$coxph)))
# check PSL calculation
unique(round(os$coxph$linear.predictors,12) == round(psl,12))
## [1] TRUE
#data.table(psl,os$coxph$linear.predictors)
# split into deciles
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),
include.lowest = T,labels = F)
dat = data.table(os$data[,c(1:2)],psl=psl,decile=dcl)
OS at 1 year
# is patient dead, alive, or last seen prior to cutoff?
dat$category = if_else(dat$ttcos<=365 & dat$censos==1,"dead",
if_else(dat$ttcos>365,"alive","censored"))
dat
## ttcos censos psl decile category
## 1: 2300 0 -0.3011479 5 alive
## 2: 2263 0 -0.9262697 2 alive
## 3: 995 0 -1.8710216 1 alive
## 4: 2348 0 -1.6208278 1 alive
## 5: 1670 1 1.9406145 10 alive
## ---
## 763: 203 0 -0.1891478 5 censored
## 764: 163 0 1.3326660 10 censored
## 765: 198 0 -0.2040768 5 censored
## 766: 189 0 -0.6716711 3 censored
## 767: 170 0 0.7548512 9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="dead") %>% group_by(decile) %>% count(category,name = "n_dead")
tmp2 = dat %>% filter(category=="alive") %>% group_by(decile) %>% count(category,name = "n_alive")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_dead","n_alive","n_censored")]
# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
avg_c1 = dat %>% filter(censos==1) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
avg_c0 = dat %>% filter(censos==0) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
# number of events per decile
evts = dat %>% filter(censos==1) %>% group_by(decile) %>% count(censos,name="n_events")
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censos"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_dead","events")
kable(merge(cnt,c2,by="decile"),align = 'c',
caption = "Overall Surival PSL decile counts at 365 days")
Overall Surival PSL decile counts at 365 days
| 1 |
2 |
66 |
9 |
1123.8701 |
1188.0714 |
481.8571 |
7 |
| 2 |
2 |
64 |
11 |
1146.6234 |
1183.7500 |
612.0000 |
5 |
| 3 |
2 |
62 |
12 |
1059.1316 |
1101.6818 |
778.3000 |
10 |
| 4 |
3 |
61 |
13 |
1072.0130 |
1125.2388 |
715.4000 |
10 |
| 5 |
4 |
58 |
15 |
1024.0649 |
1089.6667 |
630.4545 |
11 |
| 6 |
3 |
61 |
12 |
915.7105 |
945.5714 |
771.0000 |
13 |
| 7 |
3 |
67 |
7 |
1042.2857 |
1125.1967 |
726.1875 |
16 |
| 8 |
7 |
60 |
9 |
987.4737 |
1085.6863 |
787.1200 |
25 |
| 9 |
10 |
58 |
9 |
843.5065 |
988.4444 |
639.6875 |
32 |
| 10 |
22 |
38 |
17 |
507.9610 |
551.6296 |
484.3800 |
50 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_dead]
b = cnt[decile%in%c(10),n_alive]
c = sum(cnt[decile%in%c(5,6),n_dead])
d = sum(cnt[decile%in%c(5,6),n_alive])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("HR OS PSL decile 10 compared with deciles 5 + 6 at 365 days = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR OS PSL decile 10 compared with deciles 5 + 6 at 365 days = 9.84 (3.9-24.84)"
OS at 3 years
# is patient dead, alive, or last seen prior to cutoff
dat$category = if_else(dat$ttcos<=365*3 & dat$censos==1,"dead",
if_else(dat$ttcos>365*3,"alive","censored"))
dat
## ttcos censos psl decile category
## 1: 2300 0 -0.3011479 5 alive
## 2: 2263 0 -0.9262697 2 alive
## 3: 995 0 -1.8710216 1 censored
## 4: 2348 0 -1.6208278 1 alive
## 5: 1670 1 1.9406145 10 alive
## ---
## 763: 203 0 -0.1891478 5 censored
## 764: 163 0 1.3326660 10 censored
## 765: 198 0 -0.2040768 5 censored
## 766: 189 0 -0.6716711 3 censored
## 767: 170 0 0.7548512 9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="dead") %>% group_by(decile) %>% count(category,name = "n_dead")
tmp2 = dat %>% filter(category=="alive") %>% group_by(decile) %>% count(category,name = "n_alive")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_dead","n_alive","n_censored")]
# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
avg_c1 = dat %>% filter(censos==1) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
avg_c0 = dat %>% filter(censos==0) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
# number of events per decile
evts = dat %>% filter(censos==1) %>% group_by(decile) %>% count(censos,name="n_events")
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censos"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_dead","events")
kable(merge(cnt,c2,by="decile"),align = 'c',
caption = "Overall Surival PSL decile counts at 3 years")
Overall Surival PSL decile counts at 3 years
| 1 |
7 |
43 |
27 |
1123.8701 |
1188.0714 |
481.8571 |
7 |
| 2 |
4 |
51 |
22 |
1146.6234 |
1183.7500 |
612.0000 |
5 |
| 3 |
7 |
43 |
26 |
1059.1316 |
1101.6818 |
778.3000 |
10 |
| 4 |
9 |
39 |
29 |
1072.0130 |
1125.2388 |
715.4000 |
10 |
| 5 |
9 |
38 |
30 |
1024.0649 |
1089.6667 |
630.4545 |
11 |
| 6 |
10 |
29 |
37 |
915.7105 |
945.5714 |
771.0000 |
13 |
| 7 |
12 |
38 |
27 |
1042.2857 |
1125.1967 |
726.1875 |
16 |
| 8 |
19 |
33 |
24 |
987.4737 |
1085.6863 |
787.1200 |
25 |
| 9 |
27 |
28 |
22 |
843.5065 |
988.4444 |
639.6875 |
32 |
| 10 |
45 |
10 |
22 |
507.9610 |
551.6296 |
484.3800 |
50 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_dead]
b = cnt[decile%in%c(10),n_alive]
c = sum(cnt[decile%in%c(5,6),n_dead])
d = sum(cnt[decile%in%c(5,6),n_alive])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("HR OS PSL decile 10 compared with deciles 5 + 6 at ",365*3," days = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR OS PSL decile 10 compared with deciles 5 + 6 at 1095 days = 15.87 (6.76-37.27)"
Time to first line treatment failure (TTF)
tf_psl = rowSums(data.matrix(tf$data[,-c(1:2)]) %*% diag(coef(tf$coxph))) # multiple each PC by the PC beta, sum across PCs
# check PSL calculation
unique(round(tf$coxph$linear.predictors,12) == round(tf_psl,12))
## [1] TRUE
#data.table(tf1_psl,tf1$coxph$linear.predictors)
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(tf$data[,c(1:2)],psl=psl,decile=dcl)
TTF at 1 year
dat$category = if_else(dat$ttctf1<=365 & dat$censtf1==1,"fail",if_else(dat$ttctf1>365,"okay","censored"))
# describe if treatment has failed, not failed, or patient data censored
dat
## ttctf1 censtf1 psl decile category
## 1: 84 1 -0.3011479 5 fail
## 2: 582 1 -0.9262697 2 okay
## 3: 995 0 -1.8710216 1 okay
## 4: 283 1 -1.6208278 1 fail
## 5: 1329 1 1.9406145 10 okay
## ---
## 763: 57 1 -0.1891478 5 fail
## 764: 58 1 1.3326660 10 fail
## 765: 198 0 -0.2040768 5 censored
## 766: 189 0 -0.6716711 3 censored
## 767: 170 0 0.7548512 9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="fail") %>% group_by(decile) %>% count(category,name = "n_fail")
tmp2 = dat %>% filter(category=="okay") %>% group_by(decile) %>% count(category,name = "n_okay")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_fail","n_okay","n_censored")]
# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
avg_c1 = dat %>% filter(censtf1==1) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
avg_c0 = dat %>% filter(censtf1==0) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
# number of events per decile
evts = dat %>% filter(censtf1==1) %>% group_by(decile) %>% count(censtf1,name="n_events")
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censtf1"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_fail","events")
kable(merge(cnt,c2,by="decile"),align = 'c',
caption = "Time to first treatment fail PSL decile counts at 365 days")
Time to first treatment fail PSL decile counts at 365 days
| 1 |
17 |
51 |
9 |
759.5455 |
979.2000 |
450.6562 |
32 |
| 2 |
10 |
57 |
10 |
859.4416 |
1044.6471 |
496.1538 |
26 |
| 3 |
14 |
49 |
13 |
763.8947 |
921.4222 |
535.2258 |
31 |
| 4 |
15 |
51 |
11 |
769.5195 |
947.0682 |
532.7879 |
33 |
| 5 |
18 |
45 |
14 |
659.3117 |
844.5714 |
437.0000 |
35 |
| 6 |
16 |
47 |
13 |
665.6316 |
758.8462 |
567.3784 |
37 |
| 7 |
19 |
50 |
8 |
718.9351 |
933.0000 |
520.9250 |
40 |
| 8 |
20 |
45 |
11 |
618.7500 |
786.5556 |
467.7250 |
40 |
| 9 |
27 |
38 |
12 |
484.2338 |
664.8889 |
386.6800 |
50 |
| 10 |
31 |
21 |
25 |
319.8442 |
307.3438 |
328.7333 |
45 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_fail]
b = cnt[decile%in%c(10),n_okay]
c = sum(cnt[decile%in%c(5,6),n_fail])
d = sum(cnt[decile%in%c(5,6),n_okay])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("HR TTF PSL decile 10 compared with deciles 5 + 6 at 365 days = ",
round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR TTF PSL decile 10 compared with deciles 5 + 6 at 365 days = 3.99 (2.02-7.88)"
TTF at 3 years
dat$category = if_else(dat$ttctf1<=365*3 & dat$censtf1==1,"fail",if_else(dat$ttctf1>365*3,"okay","censored"))
# describe if treatment has failed, not failed, or patient data censored
dat
## ttctf1 censtf1 psl decile category
## 1: 84 1 -0.3011479 5 fail
## 2: 582 1 -0.9262697 2 fail
## 3: 995 0 -1.8710216 1 censored
## 4: 283 1 -1.6208278 1 fail
## 5: 1329 1 1.9406145 10 okay
## ---
## 763: 57 1 -0.1891478 5 fail
## 764: 58 1 1.3326660 10 fail
## 765: 198 0 -0.2040768 5 censored
## 766: 189 0 -0.6716711 3 censored
## 767: 170 0 0.7548512 9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="fail") %>% group_by(decile) %>% count(category,name = "n_fail")
tmp2 = dat %>% filter(category=="okay") %>% group_by(decile) %>% count(category,name = "n_okay")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_fail","n_okay","n_censored")]
# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
avg_c1 = dat %>% filter(censtf1==1) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
avg_c0 = dat %>% filter(censtf1==0) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
# number of events per decile
evts = dat %>% filter(censtf1==1) %>% group_by(decile) %>% count(censtf1,name="n_events")
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censtf1"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_fail","events")
# remove censored before X days
kable(merge(cnt,c2,by="decile"),align = 'c',
caption = "Time to first treatment fail PSL decile counts at 3 years")
Time to first treatment fail PSL decile counts at 3 years
| 1 |
28 |
23 |
26 |
759.5455 |
979.2000 |
450.6562 |
32 |
| 2 |
26 |
31 |
20 |
859.4416 |
1044.6471 |
496.1538 |
26 |
| 3 |
29 |
25 |
22 |
763.8947 |
921.4222 |
535.2258 |
31 |
| 4 |
28 |
24 |
25 |
769.5195 |
947.0682 |
532.7879 |
33 |
| 5 |
33 |
19 |
25 |
659.3117 |
844.5714 |
437.0000 |
35 |
| 6 |
33 |
17 |
26 |
665.6316 |
758.8462 |
567.3784 |
37 |
| 7 |
36 |
17 |
24 |
718.9351 |
933.0000 |
520.9250 |
40 |
| 8 |
36 |
14 |
26 |
618.7500 |
786.5556 |
467.7250 |
40 |
| 9 |
49 |
8 |
20 |
484.2338 |
664.8889 |
386.6800 |
50 |
| 10 |
42 |
5 |
30 |
319.8442 |
307.3438 |
328.7333 |
45 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_fail]
b = cnt[decile%in%c(10),n_okay]
c = sum(cnt[decile%in%c(5,6),n_fail])
d = sum(cnt[decile%in%c(5,6),n_okay])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("HR TTF PSL decile 10 compared with deciles 5 + 6 at ",365*3,
" days = ",round(hr10,digits = 2)," (",
round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR TTF PSL decile 10 compared with deciles 5 + 6 at 1095 days = 4.58 (1.66-12.61)"
Age
fit = lm.age
psl = rowSums(data.matrix(fit$model[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,10) == round(psl,10))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(age=fit$model$D_PT_age,psl=psl,decile=dcl)
dat$category = as.factor(if_else(dat$age<60,1,2)) # two categories: <60 or >=60 at diagnosis
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/2,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_lt60")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_ge60")
avg = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(age),mean) # average days to death per decile - ignore censored values
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),avg,by="decile"))[,c("decile","n_lt60","n_ge60","age")]
kable(cnt,align = 'c',caption = "Age PSL decile counts")
Age PSL decile counts
| 1 |
52 |
25 |
54.57143 |
| 2 |
38 |
39 |
58.62338 |
| 3 |
33 |
43 |
59.61842 |
| 4 |
32 |
45 |
60.53247 |
| 5 |
27 |
50 |
62.58442 |
| 6 |
33 |
43 |
62.35526 |
| 7 |
21 |
56 |
64.98701 |
| 8 |
16 |
60 |
66.38158 |
| 9 |
17 |
60 |
67.59740 |
| 10 |
5 |
72 |
70.46753 |
# calculate hazards ratios
a = cnt[decile%in%c(1),n_lt60]
b = cnt[decile%in%c(1),n_ge60]
c = sum(cnt[decile%in%c(5,6),n_lt60])
d = sum(cnt[decile%in%c(5,6),n_ge60])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: Age PSL in decile 1 compared with deciles 5 + 6 for <60 or >=60 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Age PSL in decile 1 compared with deciles 5 + 6 for <60 or >=60 = 3.22 (1.81-5.74)"
Gender
fit = glm.gender
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_PT_gender,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_2")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% select("decile","n_1","n_2"))
kable(cnt,align = 'c',caption = "Gender PSL decile counts")
Gender PSL decile counts
| 1 |
65 |
12 |
| 2 |
67 |
10 |
| 3 |
52 |
24 |
| 4 |
54 |
23 |
| 5 |
48 |
29 |
| 6 |
41 |
35 |
| 7 |
35 |
42 |
| 8 |
41 |
35 |
| 9 |
33 |
44 |
| 10 |
17 |
60 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_2]
b = cnt[decile%in%c(10),n_1]
c = sum(cnt[decile%in%c(5,6),n_2])
d = sum(cnt[decile%in%c(5,6),n_1])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: Gender PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Gender PSL in decile 10 compared with deciles 5 + 6 = 4.91 (2.62-9.19)"
Race
fit = glm.race
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_PT_race,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_2")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% select("decile","n_1","n_2"))
kable(cnt,align = 'c',caption = "Race PSL decile counts")
Race PSL decile counts
| 1 |
61 |
NA |
| 2 |
57 |
3 |
| 3 |
57 |
3 |
| 4 |
57 |
3 |
| 5 |
54 |
6 |
| 6 |
53 |
7 |
| 7 |
52 |
8 |
| 8 |
42 |
18 |
| 9 |
40 |
20 |
| 10 |
28 |
32 |
# calculate hazards ratios
a = cnt[decile%in%c(10),n_2]
b = cnt[decile%in%c(10),n_1]
c = sum(cnt[decile%in%c(5,6),n_2])
d = sum(cnt[decile%in%c(5,6),n_1])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: Race PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Race PSL in decile 10 compared with deciles 5 + 6 = 9.41 (4.37-20.26)"
Ethnicity
fit = glm.ethnic
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta
# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12))
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table
# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_PT_ethnic,psl=psl,decile=dcl)
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_2")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% select("decile","n_1","n_2"))
kable(cnt,align = 'c',caption = "Ethnicity PSL decile counts")
Ethnicity PSL decile counts
| 1 |
28 |
35 |
| 2 |
6 |
56 |
| 3 |
6 |
57 |
| 4 |
4 |
58 |
| 5 |
7 |
56 |
| 6 |
3 |
59 |
| 7 |
2 |
60 |
| 8 |
2 |
61 |
| 9 |
1 |
61 |
| 10 |
NA |
63 |
cnt[is.na(cnt)] <- 0 # replace NA with 0
# calculate hazards ratios
a = cnt[decile%in%c(1),n_1]
b = cnt[decile%in%c(1),n_2]
c = sum(cnt[decile%in%c(5,6),n_1])
d = sum(cnt[decile%in%c(5,6),n_2])
hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)
paste0("Odds Ratio: Ethnicity PSL in decile 1 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Ethnicity PSL in decile 1 compared with deciles 5 + 6 = 9.2 (4.07-20.79)"